In [1]:
import numpy as np
import pandas as pd
In [2]:
import glob

path = '/home/yurli/Molnlycke/case2/csv'
all_files = glob.glob(path + "/*.csv")
In [3]:
import os

df_names = []
df = {}

for filename in all_files:
    df_name = os.path.splitext(os.path.basename(filename))[0]
    df[df_name] = pd.read_csv(filename, index_col = None, header = 0)
    df_names.append(df_name)
    print(df_name, df[df_name].shape)
medications (42989, 13)
providers (5855, 12)
payer_transitions (3801, 5)
imaging_studies (855, 10)
supplies (0, 6)
payers (10, 21)
allergies (597, 6)
procedures (34981, 8)
organizations (1119, 11)
conditions (8376, 6)
careplans (3483, 9)
encounters (53346, 15)
devices (78, 7)
immunizations (15478, 6)
patients (1171, 25)
observations (299697, 8)
In [4]:
# len(set(df['patients']['Id'])) == df['patients'].shape[0]
In [5]:
patientID = '76982e06-f8b8-4509-9ca3-65a99c8650fe'
In [6]:
df['patients']['Name'] = df['patients']['FIRST'] + ' ' + df['patients']['LAST']
zipCode = []
for i in df['patients']['ZIP']:
    try:
        zipCode.append(str(int(i)))
    except:
        zipCode.append('')
df['patients']['ZIP'] = zipCode
df['patients']['Address'] = df['patients']['ADDRESS'] + ', ' + df['patients']['ZIP'] + ' ' + df['patients']['CITY'] + ', ' + df['patients']['STATE'] + ', ' + df['patients']['COUNTY']
df['patients']['MARITAL'] = df['patients']['MARITAL'].fillna('Unknown')
In [7]:
import dateutil.parser

birth = []
for patient in df['encounters']['PATIENT']:
    birthDate = df['patients'][df['patients']['Id'] == patient]['BIRTHDATE'].values[0]
    birthYear = list(pd.DatetimeIndex([birthDate]).year)
    birth.append(birthYear)
df['encounters']['birthYear'] = np.asarray(birth).reshape(-1)
df['encounters']['encounterYear'] = pd.DatetimeIndex(df['encounters']['START']).year
df['encounters']['age'] = df['encounters']['encounterYear'] - df['encounters']['birthYear']
df['encounters']['START'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['encounters']['START']]
df['encounters']['STOP'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['encounters']['STOP']]
In [8]:
df['observations']['UNITS'] = df['observations']['UNITS'].fillna('')
df['observations']['DATE'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d %H:%M:%S") for x in df['observations']['DATE']]
df['observations']['observations'] = df['observations']['DATE'] + ': ' + df['observations']['DESCRIPTION'] + ': ' + df['observations']['VALUE'] + ' ' + df['observations']['UNITS']
In [9]:
df['conditions']['STOP'] = df['conditions']['STOP'].fillna('')
In [10]:
df['medications']['START'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['medications']['START']]

stop = []
for x in df['medications']['STOP']:
    try:
        stop.append(dateutil.parser.parse(x).strftime("%Y-%m-%d"))
    except:
        stop.append('')
df['medications']['STOP'] = stop
In [11]:
df['careplans']['STOP'] = df['careplans']['STOP'].fillna('')
In [12]:
df['immunizations']['DATE'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['immunizations']['DATE']]
In [13]:
df['procedures']['DATE'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['procedures']['DATE']]
In [14]:
# df['imaging_studies']['DATE'] = [dateutil.parser.parse(x).strftime("%Y-%m-%d") for x in df['imaging_studies']['DATE']]
In [15]:
def care_data_collection(patientID):
    encounters_sub = df['encounters'][df['encounters']['PATIENT'] == patientID]   
    observations_sub = df['observations'][df['observations']['PATIENT'] == patientID]   
    conditions_sub = df['conditions'][df['conditions']['PATIENT'] == patientID]    
    medications_sub = df['medications'][df['medications']['PATIENT'] == patientID]    
    careplans_sub = df['careplans'][df['careplans']['PATIENT'] == patientID]      
    immunizations_sub = df['immunizations'][df['immunizations']['PATIENT'] == patientID]   
    procedures_sub = df['procedures'][df['procedures']['PATIENT'] == patientID]
    return encounters_sub, observations_sub, conditions_sub, medications_sub, careplans_sub, immunizations_sub, procedures_sub
In [16]:
separator = ", "
def patient_Data_printer(patientID):
    name = df['patients'][df['patients']['Id'] == patientID]['Name'].values[0]
    race = df['patients'][df['patients']['Id'] == patientID]['RACE'].values[0]
    ethnicity = df['patients'][df['patients']['Id'] == patientID]['ETHNICITY'].values[0]
    gender = df['patients'][df['patients']['Id'] == patientID]['GENDER'].values[0]
    birthDate = df['patients'][df['patients']['Id'] == patientID]['BIRTHDATE'].values[0]
    ifMarital = df['patients'][df['patients']['Id'] == patientID]['MARITAL'].values[0]
    address = df['patients'][df['patients']['Id'] == patientID]['Address'].values[0]
    
    patient_allergies = 'N/A'
    if df['allergies'][df['allergies']['PATIENT'] == patientID].shape[0] != 0:
        patient_allergies = separator.join(df['allergies'][df['allergies']['PATIENT'] == patientID]['DESCRIPTION'])
    print(name)
    print('============================')
    print('Race: ' + race)
    print('Ethnicity: ' + ethnicity)
    print('Gender: ' + gender)
    print('Birth Date: ' + birthDate)
    print('Marital Status: ' + ifMarital)
    print('Address: ' + address)
    print('==========================================================================================')
    print('Allergies: ' + patient_allergies)
    print('==========================================================================================')  

    
    encounter_sub, observation_sub, conditions_sub, medications_sub, careplans_sub, immunizations_sub, procedures_sub = care_data_collection(patientID)
    
    for encounter_id in encounter_sub['Id']:
        observation_perEncounter = observation_sub[observation_sub['ENCOUNTER'] == encounter_id]
        condition_perEncounter = conditions_sub[conditions_sub['ENCOUNTER'] == encounter_id]
        medication_perEncounter = medications_sub[medications_sub['ENCOUNTER'] == encounter_id]
        careplan_perEncounter = careplans_sub[careplans_sub['ENCOUNTER'] == encounter_id]
        immunization_perEncounter = immunizations_sub[immunizations_sub['ENCOUNTER'] == encounter_id]
        procedure_perEncounter = procedures_sub[procedures_sub['ENCOUNTER'] == encounter_id]
        
        print('Encounter: ')
        print(encounter_sub[encounter_sub['Id'] == encounter_id]['START'].values[0] + ': ' + 
              encounter_sub[encounter_sub['Id'] == encounter_id]['DESCRIPTION'].values[0] + '  (class: ' +
              encounter_sub[encounter_sub['Id'] == encounter_id]['ENCOUNTERCLASS'].values[0] + ')')
        
        if observation_perEncounter.shape[0] != 0:
            print('Observations:')
            print(*observation_perEncounter['observations'], sep = "\n")
        
        if condition_perEncounter.shape[0] != 0:
            print('Condition:')
            print(condition_perEncounter['START'].values[0] + ' -- ' + condition_perEncounter['STOP'].values[0] + ': ' +
                  condition_perEncounter['DESCRIPTION'].values[0])    
            
        if medication_perEncounter.shape[0] != 0:
            print('Medications:')
            print(medication_perEncounter['START'].values[0] + ' -- ' + medication_perEncounter['STOP'].values[0] + ': ' +
                  medication_perEncounter['DESCRIPTION'].values[0])
        
        if careplan_perEncounter.shape[0] != 0:
            print('Care Plans:')
            print(careplan_perEncounter['START'].values[0] + ' -- ' + careplan_perEncounter['STOP'].values[0] + ': ' +
                  careplan_perEncounter['DESCRIPTION'].values[0])
            
        if immunization_perEncounter.shape[0] != 0:
            print('Immunization:')
            print(immunization_perEncounter['DATE'].values[0] + ': ' + immunization_perEncounter['DESCRIPTION'].values[0])
        
        if procedure_perEncounter.shape[0] != 0:
            print('Procedure:')
            print(procedure_perEncounter['DATE'].values[0] + ': ' + procedure_perEncounter['DESCRIPTION'].values[0])
        
        print('------------------------------------------------------------------------------------------')
In [17]:
patient_Data_printer(patientID)
Christal240 Brown30
============================
Race: white
Ethnicity: nonhispanic
Gender: F
Birth Date: 1982-09-01
Marital Status: S
Address: 1060 Hansen Overpass Suite 86, 2118 Boston, Massachusetts, Suffolk County
==========================================================================================
Allergies: Latex allergy, Shellfish allergy
==========================================================================================
Encounter: 
1982-10-25: Encounter for problem  (class: ambulatory)
Medications:
1982-10-25 -- : diphenhydrAMINE Hydrochloride 25 MG Oral Tablet
Care Plans:
1982-10-25 -- : Self-care interventions (procedure)
------------------------------------------------------------------------------------------
Encounter: 
2000-06-14: Encounter for problem  (class: ambulatory)
Medications:
2000-06-14 -- : ferrous sulfate 325 MG Oral Tablet
------------------------------------------------------------------------------------------
Encounter: 
2010-03-27: Consultation for treatment  (class: outpatient)
Medications:
2010-03-27 -- 2011-03-22: Etonogestrel 68 MG Drug Implant
------------------------------------------------------------------------------------------
Encounter: 
2010-07-07: Encounter for symptom  (class: ambulatory)
Observations:
2010-07-07 18:19:08: Body temperature: 37.1 Cel
Condition:
2010-07-07 -- 2010-07-17: Acute viral pharyngitis (disorder)
Procedure:
2010-07-07: Throat culture (procedure)
------------------------------------------------------------------------------------------
Encounter: 
2010-11-10: General examination of patient (procedure)  (class: wellness)
Observations:
2010-11-10 18:19:08: Body Height: 162.4 cm
2010-11-10 18:19:08: Pain severity - 0-10 verbal numeric rating [Score] - Reported: 2.0 {score}
2010-11-10 18:19:08: Body Weight: 71.2 kg
2010-11-10 18:19:08: Body Mass Index: 27.0 kg/m2
2010-11-10 18:19:08: Diastolic Blood Pressure: 79.0 mm[Hg]
2010-11-10 18:19:08: Systolic Blood Pressure: 117.0 mm[Hg]
2010-11-10 18:19:08: Heart rate: 100.0 /min
2010-11-10 18:19:08: Respiratory rate: 12.0 /min
2010-11-10 18:19:08: Leukocytes [#/volume] in Blood by Automated count: 4.4 10*3/uL
2010-11-10 18:19:08: Erythrocytes [#/volume] in Blood by Automated count: 4.2 10*6/uL
2010-11-10 18:19:08: Hemoglobin [Mass/volume] in Blood: 13.7 g/dL
2010-11-10 18:19:08: Hematocrit [Volume Fraction] of Blood by Automated count: 41.4 %
2010-11-10 18:19:08: MCV [Entitic volume] by Automated count: 91.8 fL
2010-11-10 18:19:08: MCH [Entitic mass] by Automated count: 29.5 pg
2010-11-10 18:19:08: MCHC [Mass/volume] by Automated count: 33.7 g/dL
2010-11-10 18:19:08: Erythrocyte distribution width [Entitic volume] by Automated count: 40.1 fL
2010-11-10 18:19:08: Platelets [#/volume] in Blood by Automated count: 276.0 10*3/uL
2010-11-10 18:19:08: Platelet distribution width [Entitic volume] in Blood by Automated count: 381.6 fL
2010-11-10 18:19:08: Platelet mean volume [Entitic volume] in Blood by Automated count: 10.3 fL
2010-11-10 18:19:08: Tobacco smoking status NHIS: Never smoker 
Immunization:
2010-11-10: Influenza  seasonal  injectable  preservative free
------------------------------------------------------------------------------------------
Encounter: 
2011-06-08: Patient encounter procedure  (class: outpatient)
Procedure:
2011-06-08: Removal of subcutaneous contraceptive
------------------------------------------------------------------------------------------
Encounter: 
2011-08-03: Prenatal initial visit  (class: ambulatory)
Condition:
2011-08-03 -- 2012-02-29: Normal pregnancy
Care Plans:
2011-08-03 -- 2012-02-29: Routine antenatal care
Procedure:
2011-08-03: Standard pregnancy test
------------------------------------------------------------------------------------------
Encounter: 
2011-08-31: Prenatal visit  (class: ambulatory)
Procedure:
2011-08-31: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter: 
2011-09-28: Prenatal visit  (class: ambulatory)
Procedure:
2011-09-28: Fetal anatomy study
------------------------------------------------------------------------------------------
Encounter: 
2011-10-26: Prenatal visit  (class: ambulatory)
Procedure:
2011-10-26: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter: 
2011-11-23: Prenatal visit  (class: ambulatory)
Procedure:
2011-11-23: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter: 
2011-12-21: Prenatal visit  (class: ambulatory)
Procedure:
2011-12-21: Hemoglobin / Hematocrit / Platelet count
------------------------------------------------------------------------------------------
Encounter: 
2012-01-18: Prenatal visit  (class: ambulatory)
Procedure:
2012-01-18: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter: 
2012-02-15: Prenatal visit  (class: ambulatory)
Procedure:
2012-02-15: Streptococcus pneumoniae group B antigen test
------------------------------------------------------------------------------------------
Encounter: 
2012-02-29: Obstetric emergency hospital admission  (class: emergency)
Procedure:
2012-02-29: Episiotomy
------------------------------------------------------------------------------------------
Encounter: 
2012-04-11: Postnatal visit  (class: ambulatory)
Procedure:
2012-04-11: Physical examination following birth
------------------------------------------------------------------------------------------
Encounter: 
2012-05-13: Encounter for symptom  (class: outpatient)
Condition:
2012-05-13 -- 2013-11-13: Otitis media
Medications:
2012-05-13 -- 2012-05-27: Cefuroxime 250 MG Oral Tablet
------------------------------------------------------------------------------------------
Encounter: 
2012-07-18: Prenatal initial visit  (class: ambulatory)
Condition:
2012-07-18 -- 2012-08-08: Normal pregnancy
Procedure:
2012-07-18: Standard pregnancy test
------------------------------------------------------------------------------------------
Encounter: 
2012-08-01: Patient-initiated encounter  (class: ambulatory)
Procedure:
2012-08-01: Counseling for termination of pregnancy
------------------------------------------------------------------------------------------
Encounter: 
2012-08-08: Prenatal visit  (class: ambulatory)
Procedure:
2012-08-08: Pregnancy termination care
------------------------------------------------------------------------------------------
Encounter: 
2012-09-24: Encounter for symptom  (class: ambulatory)
Condition:
2012-09-24 -- 2012-11-01: Escherichia coli urinary tract infection
Medications:
2012-10-25 -- 2012-11-01: Nitrofurantoin 5 MG/ML Oral Suspension
Care Plans:
2012-10-25 -- 2012-11-01: Urinary tract infection care
------------------------------------------------------------------------------------------
Encounter: 
2013-01-09: Prenatal initial visit  (class: ambulatory)
Condition:
2013-01-09 -- 2013-08-14: Normal pregnancy
Care Plans:
2013-01-09 -- 2013-08-14: Routine antenatal care
Procedure:
2013-01-09: Standard pregnancy test
------------------------------------------------------------------------------------------
Encounter: 
2013-01-07: Encounter for symptom  (class: ambulatory)
Condition:
2013-01-07 -- 2013-01-21: Viral sinusitis (disorder)
------------------------------------------------------------------------------------------
Encounter: 
2013-02-06: Prenatal visit  (class: ambulatory)
Procedure:
2013-02-06: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter: 
2013-03-06: Prenatal visit  (class: ambulatory)
Procedure:
2013-03-06: Fetal anatomy study
------------------------------------------------------------------------------------------
Encounter: 
2013-04-03: Prenatal visit  (class: ambulatory)
Procedure:
2013-04-03: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter: 
2013-05-01: Prenatal visit  (class: ambulatory)
Procedure:
2013-05-01: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter: 
2013-05-29: Prenatal visit  (class: ambulatory)
Procedure:
2013-05-29: Hemoglobin / Hematocrit / Platelet count
------------------------------------------------------------------------------------------
Encounter: 
2013-06-26: Prenatal visit  (class: ambulatory)
Procedure:
2013-06-26: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter: 
2013-07-24: Prenatal visit  (class: ambulatory)
Procedure:
2013-07-24: Streptococcus pneumoniae group B antigen test
------------------------------------------------------------------------------------------
Encounter: 
2013-08-07: Prenatal visit  (class: ambulatory)
Procedure:
2013-08-07: Evaluation of uterine fundal height
------------------------------------------------------------------------------------------
Encounter: 
2013-08-14: Obstetric emergency hospital admission  (class: emergency)
Procedure:
2013-08-14: Childbirth
------------------------------------------------------------------------------------------
Encounter: 
2013-09-25: Postnatal visit  (class: ambulatory)
Procedure:
2013-09-25: Physical examination following birth
------------------------------------------------------------------------------------------
Encounter: 
2013-11-13: General examination of patient (procedure)  (class: wellness)
Observations:
2013-11-13 18:19:08: Body Height: 162.4 cm
2013-11-13 18:19:08: Pain severity - 0-10 verbal numeric rating [Score] - Reported: 4.0 {score}
2013-11-13 18:19:08: Body Weight: 76.0 kg
2013-11-13 18:19:08: Body Mass Index: 28.8 kg/m2
2013-11-13 18:19:08: Diastolic Blood Pressure: 83.0 mm[Hg]
2013-11-13 18:19:08: Systolic Blood Pressure: 123.0 mm[Hg]
2013-11-13 18:19:08: Heart rate: 97.0 /min
2013-11-13 18:19:08: Respiratory rate: 15.0 /min
2013-11-13 18:19:08: Total Cholesterol: 188.8 mg/dL
2013-11-13 18:19:08: Triglycerides: 144.6 mg/dL
2013-11-13 18:19:08: Low Density Lipoprotein Cholesterol: 81.4 mg/dL
2013-11-13 18:19:08: High Density Lipoprotein Cholesterol: 78.5 mg/dL
2013-11-13 18:19:08: Tobacco smoking status NHIS: Never smoker 
Immunization:
2013-11-13: Influenza  seasonal  injectable  preservative free
------------------------------------------------------------------------------------------
Encounter: 
2014-01-23: Encounter for symptom  (class: ambulatory)
Condition:
2014-01-23 -- 2014-01-30: Viral sinusitis (disorder)
------------------------------------------------------------------------------------------
Encounter: 
2015-03-24: Encounter for symptom  (class: ambulatory)
Observations:
2015-03-24 18:19:08: Body temperature: 37.8 Cel
Condition:
2015-03-24 -- 2015-04-03: Acute viral pharyngitis (disorder)
------------------------------------------------------------------------------------------
Encounter: 
2015-11-28: Emergency room admission (procedure)  (class: emergency)
Condition:
2015-11-28 -- 2016-01-02: Whiplash injury to neck
Medications:
2015-11-28 -- 2016-01-02: Naproxen sodium 220 MG Oral Tablet
Care Plans:
2015-11-28 -- 2016-01-02: Musculoskeletal care
------------------------------------------------------------------------------------------
Encounter: 
2016-09-09: Consultation for treatment  (class: outpatient)
Medications:
2016-09-09 -- 2017-09-04: Errin 28 Day Pack
------------------------------------------------------------------------------------------
Encounter: 
2016-11-03: Encounter for symptom  (class: ambulatory)
Condition:
2016-11-03 -- 2016-11-17: Acute bronchitis (disorder)
Medications:
2016-11-03 -- 2016-11-17: Acetaminophen 325 MG Oral Tablet
Care Plans:
2016-11-03 -- 2019-11-20: Respiratory therapy
Procedure:
2016-11-03: Sputum examination (procedure)
------------------------------------------------------------------------------------------
Encounter: 
2016-11-16: General examination of patient (procedure)  (class: wellness)
Observations:
2016-11-16 18:19:08: Body Height: 162.4 cm
2016-11-16 18:19:08: Pain severity - 0-10 verbal numeric rating [Score] - Reported: 2.0 {score}
2016-11-16 18:19:08: Body Weight: 79.1 kg
2016-11-16 18:19:08: Body Mass Index: 30.0 kg/m2
2016-11-16 18:19:08: Diastolic Blood Pressure: 73.0 mm[Hg]
2016-11-16 18:19:08: Systolic Blood Pressure: 117.0 mm[Hg]
2016-11-16 18:19:08: Heart rate: 77.0 /min
2016-11-16 18:19:08: Respiratory rate: 16.0 /min
2016-11-16 18:19:08: Total Cholesterol: 177.5 mg/dL
2016-11-16 18:19:08: Triglycerides: 123.1 mg/dL
2016-11-16 18:19:08: Low Density Lipoprotein Cholesterol: 88.3 mg/dL
2016-11-16 18:19:08: High Density Lipoprotein Cholesterol: 64.6 mg/dL
2016-11-16 18:19:08: Leukocytes [#/volume] in Blood by Automated count: 4.2 10*3/uL
2016-11-16 18:19:08: Erythrocytes [#/volume] in Blood by Automated count: 4.7 10*6/uL
2016-11-16 18:19:08: Hemoglobin [Mass/volume] in Blood: 17.0 g/dL
2016-11-16 18:19:08: Hematocrit [Volume Fraction] of Blood by Automated count: 40.2 %
2016-11-16 18:19:08: MCV [Entitic volume] by Automated count: 94.6 fL
2016-11-16 18:19:08: MCH [Entitic mass] by Automated count: 31.4 pg
2016-11-16 18:19:08: MCHC [Mass/volume] by Automated count: 35.7 g/dL
2016-11-16 18:19:08: Erythrocyte distribution width [Entitic volume] by Automated count: 40.0 fL
2016-11-16 18:19:08: Platelets [#/volume] in Blood by Automated count: 388.0 10*3/uL
2016-11-16 18:19:08: Platelet distribution width [Entitic volume] in Blood by Automated count: 244.0 fL
2016-11-16 18:19:08: Platelet mean volume [Entitic volume] in Blood by Automated count: 9.9 fL
2016-11-16 18:19:08: Tobacco smoking status NHIS: Never smoker 
Condition:
2016-11-16 -- : Body mass index 30+ - obesity (finding)
Immunization:
2016-11-16: Influenza  seasonal  injectable  preservative free
------------------------------------------------------------------------------------------
Encounter: 
2017-09-04: Consultation for treatment  (class: outpatient)
------------------------------------------------------------------------------------------
Encounter: 
2017-09-11: Admission to surgical department  (class: inpatient)
Care Plans:
2017-09-11 -- 2017-09-25: Minor surgery care management (procedure)
Procedure:
2017-09-11: Bilateral tubal ligation
------------------------------------------------------------------------------------------
Encounter: 
2017-12-17: Encounter for symptom  (class: ambulatory)
Condition:
2017-12-17 -- 2017-12-24: Viral sinusitis (disorder)
------------------------------------------------------------------------------------------
Encounter: 
2019-11-20: General examination of patient (procedure)  (class: wellness)
Observations:
2019-11-20 18:19:08: Body Height: 162.4 cm
2019-11-20 18:19:08: Pain severity - 0-10 verbal numeric rating [Score] - Reported: 1.0 {score}
2019-11-20 18:19:08: Body Weight: 77.5 kg
2019-11-20 18:19:08: Body Mass Index: 29.4 kg/m2
2019-11-20 18:19:08: Diastolic Blood Pressure: 79.0 mm[Hg]
2019-11-20 18:19:08: Systolic Blood Pressure: 127.0 mm[Hg]
2019-11-20 18:19:08: Heart rate: 77.0 /min
2019-11-20 18:19:08: Respiratory rate: 12.0 /min
2019-11-20 18:19:08: Total Cholesterol: 187.6 mg/dL
2019-11-20 18:19:08: Triglycerides: 142.3 mg/dL
2019-11-20 18:19:08: Low Density Lipoprotein Cholesterol: 97.1 mg/dL
2019-11-20 18:19:08: High Density Lipoprotein Cholesterol: 62.0 mg/dL
2019-11-20 18:19:08: Tobacco smoking status NHIS: Never smoker 
Immunization:
2019-11-20: Influenza  seasonal  injectable  preservative free
Procedure:
2019-11-20: Medication Reconciliation (procedure)
------------------------------------------------------------------------------------------
In [18]:
encounters, observations, conditions, medications, careplans, immunizations, procedures = care_data_collection(patientID)
In [19]:
encounters['featureName'] = ['Encounters'] * encounters.shape[0]
conditions['featureName'] = ['Conditions'] * conditions.shape[0]
medications['featureName'] = ['Medications'] * medications.shape[0]
careplans['featureName'] = ['Careplans'] * careplans.shape[0]
immunizations['featureName'] = ['Immunizations'] * immunizations.shape[0]
procedures['featureName'] = ['Procedures'] * procedures.shape[0]
/tmp/ipykernel_91928/1069656180.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  encounters['featureName'] = ['Encounters'] * encounters.shape[0]
/tmp/ipykernel_91928/1069656180.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  conditions['featureName'] = ['Conditions'] * conditions.shape[0]
/tmp/ipykernel_91928/1069656180.py:3: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  medications['featureName'] = ['Medications'] * medications.shape[0]
/tmp/ipykernel_91928/1069656180.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  careplans['featureName'] = ['Careplans'] * careplans.shape[0]
/tmp/ipykernel_91928/1069656180.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  immunizations['featureName'] = ['Immunizations'] * immunizations.shape[0]
/tmp/ipykernel_91928/1069656180.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  procedures['featureName'] = ['Procedures'] * procedures.shape[0]
In [20]:
immunizations = immunizations.rename(columns = {"DATE": "START"})
procedures = procedures.rename(columns = {"DATE": "START"})
In [23]:
col_toUse = ['START', 'STOP', 'DESCRIPTION', 'featureName']

encounters_toUse = encounters[col_toUse + ['ENCOUNTERCLASS']]
conditions_toUse = conditions[col_toUse]
careplans_toUse = careplans[col_toUse]
medications_toUse = medications[col_toUse]
immunizations_toUse = immunizations[['START', 'DESCRIPTION', 'featureName']]
procedures_toUse = procedures[['START', 'DESCRIPTION', 'featureName']]
In [24]:
df_toUse = pd.concat([medications_toUse, careplans_toUse, conditions_toUse]).fillna('')
In [26]:
import plotly.express as px
 
fig = px.timeline(df_toUse.sort_values('START'),
                  x_start = "START",
                  x_end = "STOP",
                  y = "featureName",
                  text = "DESCRIPTION",
                  color = "featureName",
                  width = 2000, height = 300)

for i in range(encounters_toUse.shape[0]):
    if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'urgentcare': 
        fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "orange")
    if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'emergency': 
        fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "red")
    if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'inpatient': 
        fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "yellow")
    if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'outpatient': 
        fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "cyan")
    if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'ambulatory': 
        fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "lightblue")
    if encounters_toUse['ENCOUNTERCLASS'].values[i] == 'wellness': 
        fig.add_vline(x = encounters_toUse['START'].values[i], line_width = 1, line_dash = "dash", line_color = "lightgreen")    
    
    fig.add_annotation(x = encounters_toUse['START'].values[i], y = 1, yref = "paper", text = encounters_toUse['ENCOUNTERCLASS'].values[i])

fig.add_scatter(x = immunizations_toUse['START'], y = ['Immunizations'] * immunizations_toUse.shape[0], 
                mode = "markers", name = "Immunizations", marker = dict(size = 10))
fig.add_scatter(x = procedures_toUse['START'], y = ['Procedures'] * procedures_toUse.shape[0], 
                mode = "markers", name = "Procedures", marker = dict(size = 10))

fig.update_layout(font = dict(family="Courier New, monospace", size = 10))
fig.update_yaxes(title = '', showticklabels = True)
fig.show()
In [30]:
# remove a patient with condition on multiple encounters
conditions_byPatient = df['conditions'].drop_duplicates(subset = ['PATIENT', 'DESCRIPTION'],
                                                        keep = 'first').reset_index(drop = True)
In [31]:
condition_rank = pd.DataFrame(conditions_byPatient['DESCRIPTION'].value_counts()).reset_index()
condition_rank.columns = ['Conditions', 'Freq']
In [32]:
condition_rank.head(3)
Out[32]:
Conditions Freq
0 Viral sinusitis (disorder) 743
1 Acute viral pharyngitis (disorder) 492
2 Acute bronchitis (disorder) 464
In [29]:
import holoviews as hv
hv.extension('bokeh')

bars = hv.Bars(data = condition_rank)
bars.opts(width = 2000, height = 800, xrotation = 90, title = 'Conditions Rank', ylabel = 'Amount')
Out[29]:
In [34]:
top3 = list(condition_rank.head(3)['Conditions'].values)
In [35]:
conditions_top3 = conditions_byPatient[conditions_byPatient['DESCRIPTION'].isin(list(condition_rank.head(3)['Conditions'].values))]

race = []
ethnicity = []
gender = []
ifMarital = []

for patient in conditions_top3['PATIENT']:
    race.append(df['patients'][df['patients']['Id'] == patient]['RACE'].values[0])
    ethnicity.append(df['patients'][df['patients']['Id'] == patient]['ETHNICITY'].values[0])
    gender.append(df['patients'][df['patients']['Id'] == patient]['GENDER'].values[0])
    ifMarital.append(df['patients'][df['patients']['Id'] == patient]['MARITAL'].values[0])   

conditions_top3['race'] = race
conditions_top3['ethnicity'] = ethnicity
conditions_top3['gender'] = gender
conditions_top3['ifMarital'] = ifMarital
conditions_top3['ifMarital'] = conditions_top3['ifMarital'].fillna('Unknown')
/tmp/ipykernel_91928/817431530.py:14: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/tmp/ipykernel_91928/817431530.py:15: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/tmp/ipykernel_91928/817431530.py:16: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/tmp/ipykernel_91928/817431530.py:17: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/tmp/ipykernel_91928/817431530.py:18: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [36]:
conditions_top3_age = df['conditions'][df['conditions']['DESCRIPTION'].isin(top3)]

birthYear = []
for patient in conditions_top3_age['PATIENT']:
    birthDate = df['patients'][df['patients']['Id'] == patient]['BIRTHDATE'].values[0]
    birthYear.append(list(pd.DatetimeIndex([birthDate]).year))

conditions_top3_age['birthYear'] = np.asarray(birthYear).reshape(-1)
conditions_top3_age['conditionYear'] = pd.DatetimeIndex(conditions_top3_age['START']).year
conditions_top3_age['age'] = conditions_top3_age['conditionYear'] - conditions_top3_age['birthYear']
/tmp/ipykernel_91928/3164922842.py:8: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/tmp/ipykernel_91928/3164922842.py:9: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/tmp/ipykernel_91928/3164922842.py:10: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [38]:
from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig1 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))
fig2 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))
fig3 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))
fig4 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))
fig5 = make_subplots(rows = 1, cols = 3, subplot_titles = (top3))

for i in range(3):
    cond_df = conditions_top3[conditions_top3['DESCRIPTION'] == top3[i]]
    gender = pd.DataFrame(cond_df['gender'].value_counts()).reset_index()
    race = pd.DataFrame(cond_df['race'].value_counts()).reset_index()
    ethnicity = pd.DataFrame(cond_df['ethnicity'].value_counts()).reset_index()
    ifMarital = pd.DataFrame(cond_df['ifMarital'].value_counts()).reset_index()
    
    fig1.add_trace(
        go.Pie(values = list(gender['gender'].values), labels = list(gender['index'].values)),
        row = 1, col = (i+1)) 
    fig2.add_trace(
        go.Pie(values = list(race['race'].values), labels = list(race['index'].values)),
        row = 1, col = (i+1)) 
    fig3.add_trace(
        go.Pie(values = list(ethnicity['ethnicity'].values), labels = list(ethnicity['index'].values)),
        row = 1, col = (i+1)) 
    fig4.add_trace(
        go.Pie(values = list(ifMarital['ifMarital'].values), labels = list(ifMarital['index'].values)),
        row = 1, col = (i+1)) 
    fig5.add_trace(
        go.Histogram(x = conditions_top3_age[conditions_top3_age['DESCRIPTION'] == top3[i]]['age'].values,
                     xbins = dict(
                     start = 0, end = 100, size = 10), # M18 stands for 18 months
                     autobinx = False
                     ), 
        row = 1, col = (i+1))

fig1.update_layout(title_text = "Gender")
fig1.show()
fig2.update_layout(title_text = "Race")
fig2.show()
fig3.update_layout(title_text = "Ethnicity")
fig3.show()
fig4.update_layout(title_text = "Marital Status")
fig4.show()
fig5.update_layout(width = 1000, height = 450, showlegend = False, title_text = "Age")
fig5.show()
In [31]:
encounterClass_eachCase = []
medication_eachCase = []
careplan_eachCase = []
# immunization_eachCase = []

for i in range(conditions_top3_age.shape[0]):
    el = df['encounters'][(df['encounters']['PATIENT'] == conditions_top3_age['PATIENT'].values[i]) 
                          & (df['encounters']['Id'] == conditions_top3_age['ENCOUNTER'].values[i])]  
    encounterClass_eachCase.append(el['ENCOUNTERCLASS'].values[0])
    
    med = df['medications'][(df['medications']['PATIENT'] == conditions_top3_age['PATIENT'].values[i]) 
                            & (df['medications']['ENCOUNTER'] == conditions_top3_age['ENCOUNTER'].values[i])]
    if med.shape[0] != 0:
        medication_eachCase.append(med['DESCRIPTION'].values[0])
    else:
        medication_eachCase.append('None')
        
    cp = df['careplans'][(df['careplans']['PATIENT'] == conditions_top3_age['PATIENT'].values[i]) 
                          & (df['careplans']['ENCOUNTER'] == conditions_top3_age['ENCOUNTER'].values[i])]
    if cp.shape[0] != 0:
        careplan_eachCase.append(cp['DESCRIPTION'].values[0])
    else:
        careplan_eachCase.append('None')
        
    '''
    imm = df['immunizations'][(df['immunizations']['PATIENT'] == conditions_top3_age['PATIENT'].values[i]) 
                              & (df['immunizations']['ENCOUNTER'] == conditions_top3_age['ENCOUNTER'].values[i])]
    if imm.shape[0] != 0:
        immunization_eachCase.append(imm['DESCRIPTION'].values[0])
    else:
        immunization_eachCase.append('None')
    '''
In [32]:
conditions_top3_age['encounterClass'] = encounterClass_eachCase
conditions_top3_age['medication'] = medication_eachCase
conditions_top3_age['careplan'] = careplan_eachCase
# conditions_top3_age['immunization'] = immunization_eachCase
/tmp/ipykernel_55933/1431142842.py:1: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/tmp/ipykernel_55933/1431142842.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

/tmp/ipykernel_55933/1431142842.py:3: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [33]:
# set(immunization_eachCase)
In [34]:
fig6 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))
fig7 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3])
fig8 = make_subplots(rows = 1, cols = 3, specs=[[{'type':'domain'}]*3], subplot_titles = (top3))

for i in range(3):
    cond_df = conditions_top3_age[conditions_top3_age['DESCRIPTION'] == top3[i]]
    ec_count = pd.DataFrame(cond_df['encounterClass'].value_counts()).reset_index()
    med_count = pd.DataFrame(cond_df['medication'].value_counts()).reset_index()
    cp_count = pd.DataFrame(cond_df['careplan'].value_counts()).reset_index()
    
    fig6.add_trace(
        go.Pie(values = list(ec_count['encounterClass'].values), labels = list(ec_count['index'].values)),
        row = 1, col = (i+1)) 
    fig7.add_trace(
        go.Pie(values = list(med_count['medication'].values), labels = list(med_count['index'].values)),
        row = 1, col = (i+1)) 
    fig8.add_trace(
        go.Pie(values = list(cp_count['careplan'].values), labels = list(cp_count['index'].values)),
        row = 1, col = (i+1)) 

fig6.update_layout(title_text = "Encounter Class")
fig6.show()
fig7.update_layout(width = 900, height = 900, title_text = "Medications",
                   legend = dict(title_font_family = "Times New Roman", 
                                 font = dict(size = 10),
                                 orientation = "h"))
fig7.show()
fig8.update_layout(width = 1050, height = 600, title_text = "Care Plans",
                   legend = dict(title_font_family = "Times New Roman", 
                                 font = dict(size = 10)))
fig8.show()
In [35]:
# df['patients'][df['patients']['DEATHDATE'].notna()]
In [39]:
df['careplans']['DESCRIPTION'].value_counts()
Out[39]:
Respiratory therapy                                               633
Routine antenatal care                                            397
Diabetes self management plan                                     339
Lifestyle education regarding hypertension                        302
Physical therapy procedure                                        230
Self-care interventions (procedure)                               189
Musculoskeletal care                                              187
Wound care                                                        185
Fracture care                                                     169
Hyperlipidemia clinical management plan                           136
Head injury rehabilitation                                        102
Urinary tract infection care                                       76
Inpatient care plan (record artifact)                              59
Asthma self management                                             57
Heart failure self management plan                                 57
Cancer care plan                                                   49
Care plan (record artifact)                                        46
Burn care                                                          37
Minor surgery care management (procedure)                          31
Demential management                                               29
Allergic disorder monitoring                                       29
Chronic obstructive pulmonary disease clinical management plan     27
Skin condition care                                                25
Care Plan                                                          24
Overactivity/inattention behavior management                       20
Major surgery care management                                      15
Terminal care                                                      15
Psychiatry care plan                                                7
Mental health care plan                                             5
Dialysis care plan (record artifact)                                3
Major depressive disorder clinical management plan                  2
Spinal cord injury rehabilitation                                   1
Name: DESCRIPTION, dtype: int64
In [40]:
from datetime import datetime, timedelta
from collections import OrderedDict

# datetime.strptime(df['careplans']['STOP'].values[0], "%Y-%m-%d")-datetime.strptime(df['careplans']['START'].values[0], "%Y-%m-%d")
careplan_top1 = df['careplans'][df['careplans']['DESCRIPTION'] == 'Respiratory therapy']
In [38]:
# pd.set_option('display.max_rows', 1000)
In [39]:
'''
careDays = []
for i in range(careplan_top1.shape[0]):
    try: 
        diff = datetime.strptime(careplan_top1['STOP'].values[i], "%Y-%m-%d") - datetime.strptime(careplan_top1['START'].values[i], "%Y-%m-%d")
        careDays.append(diff)
    except:
        careDays.append(np.nan)
careplan_top1['careDays'] = careDays
np.mean(careplan_top1['careDays'])
'''
Out[39]:
'\ncareDays = []\nfor i in range(careplan_top1.shape[0]):\n    try: \n        diff = datetime.strptime(careplan_top1[\'STOP\'].values[i], "%Y-%m-%d") - datetime.strptime(careplan_top1[\'START\'].values[i], "%Y-%m-%d")\n        careDays.append(diff)\n    except:\n        careDays.append(np.nan)\ncareplan_top1[\'careDays\'] = careDays\nnp.mean(careplan_top1[\'careDays\'])\n'
In [41]:
careplan_top1 = careplan_top1[careplan_top1['STOP'] != '']
In [42]:
monthList = []
for i in range(careplan_top1.shape[0]):
    try: 
        diff = datetime.strptime(careplan_top1['STOP'].values[i], "%Y-%m-%d") - datetime.strptime(careplan_top1['START'].values[i], "%Y-%m-%d")
        monthList.append(diff)
    except:
        monthList.append(np.nan)
    
    start = datetime.strptime(careplan_top1['START'].values[i], "%Y-%m-%d")
    end = datetime.strptime(careplan_top1['STOP'].values[i], "%Y-%m-%d")
    mon = OrderedDict(((start + timedelta(_)).strftime(r"%b%Y"), None) for _ in range((end - start).days)).keys()
    monthList = monthList + list(mon)
In [43]:
monthList = [x for x in monthList if len(str(x)) == 7]
In [44]:
amount_byMonth = pd.DataFrame(monthList, columns = ['CareDate']).value_counts().reset_index()
amount_byMonth.columns = ['CareDate', 'Amount']
amount_byMonth['Year'] = [datetime.strptime(x, '%b%Y').year for x in amount_byMonth['CareDate']]
amount_byMonth['Month'] = [datetime.strptime(x, '%b%Y').month for x in amount_byMonth['CareDate']]
In [45]:
start = datetime.strptime('2005-01-01', "%Y-%m-%d")
end = datetime.strptime('2020-04-30', "%Y-%m-%d")
mon = list(OrderedDict(((start + timedelta(_)).strftime(r"%b%Y"), None) for _ in range((end - start).days)).keys())
In [46]:
amount = pd.DataFrame(mon, columns = ['CareDate']).merge(amount_byMonth[['CareDate', 'Amount']], how = 'left', on = 'CareDate')
amount = amount.fillna(0)
In [93]:
mu = amount['Amount'].mean()
sd = amount['Amount'].std()

amount_norm = amount.copy()

# Normalize data
amount_norm['Amount'] = (amount['Amount'] - mu) / sd
In [94]:
train_days = 6 #months
x = []
y = []
num = 0
for i in range(train_days, amount_norm.shape[0]):
    x.append(list(amount_norm['Amount'][num:i].values))
    y.append(amount_norm['Amount'][i])
    num += 1
In [95]:
X = np.expand_dims(np.array(x), -1)
Y = np.array(y)
In [96]:
train_num = X.shape[0]-12
X_train = X[0:train_num]
Y_train = Y[0:train_num]

X_test = X[train_num:X.shape[0]]
Y_test = Y[train_num:Y.shape[0]]
In [97]:
from tensorflow.keras.layers import Input, Dense, LeakyReLU, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras import regularizers
from tensorflow.keras.layers import LSTM
from tensorflow.keras.regularizers import l1, l2
In [98]:
seq_input = Input(shape = (X_train.shape[1], X_train.shape[2]))

x = LSTM(128, kernel_regularizer = l2(0.002), recurrent_regularizer = l2(0.002), bias_regularizer = l2(0.002),
         return_sequences = False)(seq_input)
x = Dropout(0.2)(x)
out = Dense(1, activation = 'linear')(x)
net = Model(seq_input, out)
net.summary()
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_3 (InputLayer)        [(None, 6, 1)]            0         
                                                                 
 lstm_2 (LSTM)               (None, 128)               66560     
                                                                 
 dropout_2 (Dropout)         (None, 128)               0         
                                                                 
 dense_2 (Dense)             (None, 1)                 129       
                                                                 
=================================================================
Total params: 66,689
Trainable params: 66,689
Non-trainable params: 0
_________________________________________________________________
In [99]:
net.compile(loss = 'mse', optimizer = Adam(0.001))
es = EarlyStopping(monitor = 'val_loss', mode = 'min', verbose = 1, patience = 10) 
bm = ModelCheckpoint('../net_weights.hdf5', save_best_only = True, 
                     monitor = 'val_loss', mode = 'min')
net.fit(X_train, Y_train, epochs = 200, batch_size = 8, validation_split = 0.1, callbacks = [es, bm])
Epoch 1/200
19/19 [==============================] - 3s 42ms/step - loss: 0.9256 - val_loss: 0.6269
Epoch 2/200
19/19 [==============================] - 0s 12ms/step - loss: 0.5790 - val_loss: 0.5032
Epoch 3/200
19/19 [==============================] - 0s 11ms/step - loss: 0.4838 - val_loss: 0.4835
Epoch 4/200
19/19 [==============================] - 0s 10ms/step - loss: 0.4363 - val_loss: 0.4440
Epoch 5/200
19/19 [==============================] - 0s 9ms/step - loss: 0.4022 - val_loss: 0.4198
Epoch 6/200
19/19 [==============================] - 0s 10ms/step - loss: 0.3821 - val_loss: 0.3966
Epoch 7/200
19/19 [==============================] - 0s 10ms/step - loss: 0.3628 - val_loss: 0.3749
Epoch 8/200
19/19 [==============================] - 0s 9ms/step - loss: 0.3458 - val_loss: 0.3638
Epoch 9/200
19/19 [==============================] - 0s 9ms/step - loss: 0.3263 - val_loss: 0.3506
Epoch 10/200
19/19 [==============================] - 0s 9ms/step - loss: 0.3146 - val_loss: 0.3381
Epoch 11/200
19/19 [==============================] - 0s 9ms/step - loss: 0.3034 - val_loss: 0.3200
Epoch 12/200
19/19 [==============================] - 0s 9ms/step - loss: 0.2910 - val_loss: 0.3167
Epoch 13/200
19/19 [==============================] - 0s 10ms/step - loss: 0.2744 - val_loss: 0.3000
Epoch 14/200
19/19 [==============================] - 0s 10ms/step - loss: 0.2637 - val_loss: 0.2955
Epoch 15/200
19/19 [==============================] - 0s 10ms/step - loss: 0.2576 - val_loss: 0.2825
Epoch 16/200
19/19 [==============================] - 0s 10ms/step - loss: 0.2432 - val_loss: 0.2623
Epoch 17/200
19/19 [==============================] - 0s 10ms/step - loss: 0.2372 - val_loss: 0.2587
Epoch 18/200
19/19 [==============================] - 0s 10ms/step - loss: 0.2248 - val_loss: 0.2399
Epoch 19/200
19/19 [==============================] - 0s 11ms/step - loss: 0.2206 - val_loss: 0.2287
Epoch 20/200
19/19 [==============================] - 0s 10ms/step - loss: 0.2087 - val_loss: 0.2198
Epoch 21/200
19/19 [==============================] - 0s 10ms/step - loss: 0.2003 - val_loss: 0.2131
Epoch 22/200
19/19 [==============================] - 0s 10ms/step - loss: 0.1900 - val_loss: 0.2073
Epoch 23/200
19/19 [==============================] - 0s 9ms/step - loss: 0.1877 - val_loss: 0.2182
Epoch 24/200
19/19 [==============================] - 0s 10ms/step - loss: 0.1834 - val_loss: 0.2021
Epoch 25/200
19/19 [==============================] - 0s 10ms/step - loss: 0.1697 - val_loss: 0.1964
Epoch 26/200
19/19 [==============================] - 0s 10ms/step - loss: 0.1640 - val_loss: 0.1877
Epoch 27/200
19/19 [==============================] - 0s 9ms/step - loss: 0.1643 - val_loss: 0.1765
Epoch 28/200
19/19 [==============================] - 0s 9ms/step - loss: 0.1540 - val_loss: 0.1939
Epoch 29/200
19/19 [==============================] - 0s 11ms/step - loss: 0.1493 - val_loss: 0.1710
Epoch 30/200
19/19 [==============================] - 0s 12ms/step - loss: 0.1481 - val_loss: 0.1656
Epoch 31/200
19/19 [==============================] - 0s 11ms/step - loss: 0.1390 - val_loss: 0.1654
Epoch 32/200
19/19 [==============================] - 0s 11ms/step - loss: 0.1369 - val_loss: 0.1520
Epoch 33/200
19/19 [==============================] - 0s 10ms/step - loss: 0.1305 - val_loss: 0.1577
Epoch 34/200
19/19 [==============================] - 0s 10ms/step - loss: 0.1279 - val_loss: 0.1517
Epoch 35/200
19/19 [==============================] - 0s 11ms/step - loss: 0.1231 - val_loss: 0.1463
Epoch 36/200
19/19 [==============================] - 0s 11ms/step - loss: 0.1174 - val_loss: 0.1362
Epoch 37/200
19/19 [==============================] - 0s 11ms/step - loss: 0.1151 - val_loss: 0.1390
Epoch 38/200
19/19 [==============================] - 0s 8ms/step - loss: 0.1094 - val_loss: 0.1440
Epoch 39/200
19/19 [==============================] - 0s 12ms/step - loss: 0.1111 - val_loss: 0.1320
Epoch 40/200
19/19 [==============================] - 0s 11ms/step - loss: 0.1049 - val_loss: 0.1279
Epoch 41/200
19/19 [==============================] - 0s 11ms/step - loss: 0.1026 - val_loss: 0.1305
Epoch 42/200
19/19 [==============================] - 0s 10ms/step - loss: 0.1034 - val_loss: 0.1242
Epoch 43/200
19/19 [==============================] - 0s 15ms/step - loss: 0.0969 - val_loss: 0.1200
Epoch 44/200
19/19 [==============================] - 0s 11ms/step - loss: 0.0965 - val_loss: 0.1182
Epoch 45/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0933 - val_loss: 0.1222
Epoch 46/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0906 - val_loss: 0.1075
Epoch 47/200
19/19 [==============================] - 0s 11ms/step - loss: 0.0886 - val_loss: 0.1037
Epoch 48/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0906 - val_loss: 0.1080
Epoch 49/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0849 - val_loss: 0.1072
Epoch 50/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0836 - val_loss: 0.1116
Epoch 51/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0800 - val_loss: 0.0997
Epoch 52/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0790 - val_loss: 0.1006
Epoch 53/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0749 - val_loss: 0.1022
Epoch 54/200
19/19 [==============================] - 0s 11ms/step - loss: 0.0772 - val_loss: 0.0941
Epoch 55/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0748 - val_loss: 0.1053
Epoch 56/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0748 - val_loss: 0.0965
Epoch 57/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0711 - val_loss: 0.0893
Epoch 58/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0720 - val_loss: 0.0922
Epoch 59/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0672 - val_loss: 0.0936
Epoch 60/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0654 - val_loss: 0.0913
Epoch 61/200
19/19 [==============================] - 0s 11ms/step - loss: 0.0644 - val_loss: 0.0902
Epoch 62/200
19/19 [==============================] - 0s 11ms/step - loss: 0.0628 - val_loss: 0.0890
Epoch 63/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0666 - val_loss: 0.0934
Epoch 64/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0605 - val_loss: 0.0946
Epoch 65/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0644 - val_loss: 0.0850
Epoch 66/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0621 - val_loss: 0.0855
Epoch 67/200
19/19 [==============================] - 0s 11ms/step - loss: 0.0586 - val_loss: 0.0826
Epoch 68/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0589 - val_loss: 0.0881
Epoch 69/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0575 - val_loss: 0.0839
Epoch 70/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0605 - val_loss: 0.0819
Epoch 71/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0613 - val_loss: 0.0898
Epoch 72/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0542 - val_loss: 0.0787
Epoch 73/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0569 - val_loss: 0.0775
Epoch 74/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0557 - val_loss: 0.0794
Epoch 75/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0555 - val_loss: 0.0780
Epoch 76/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0549 - val_loss: 0.0719
Epoch 77/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0566 - val_loss: 0.0772
Epoch 78/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0581 - val_loss: 0.0763
Epoch 79/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0535 - val_loss: 0.0741
Epoch 80/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0521 - val_loss: 0.0745
Epoch 81/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0530 - val_loss: 0.0744
Epoch 82/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0514 - val_loss: 0.0779
Epoch 83/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0539 - val_loss: 0.0790
Epoch 84/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0484 - val_loss: 0.0712
Epoch 85/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0492 - val_loss: 0.0715
Epoch 86/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0480 - val_loss: 0.0749
Epoch 87/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0487 - val_loss: 0.0790
Epoch 88/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0527 - val_loss: 0.0787
Epoch 89/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0579 - val_loss: 0.0721
Epoch 90/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0529 - val_loss: 0.0722
Epoch 91/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0550 - val_loss: 0.0708
Epoch 92/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0502 - val_loss: 0.0706
Epoch 93/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0459 - val_loss: 0.0764
Epoch 94/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0494 - val_loss: 0.0789
Epoch 95/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0508 - val_loss: 0.0692
Epoch 96/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0497 - val_loss: 0.0707
Epoch 97/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0476 - val_loss: 0.0775
Epoch 98/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0475 - val_loss: 0.0684
Epoch 99/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0487 - val_loss: 0.0744
Epoch 100/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0510 - val_loss: 0.0770
Epoch 101/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0508 - val_loss: 0.0744
Epoch 102/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0455 - val_loss: 0.0699
Epoch 103/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0484 - val_loss: 0.0760
Epoch 104/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0449 - val_loss: 0.0681
Epoch 105/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0455 - val_loss: 0.0687
Epoch 106/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0501 - val_loss: 0.0709
Epoch 107/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0457 - val_loss: 0.0747
Epoch 108/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0459 - val_loss: 0.0691
Epoch 109/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0462 - val_loss: 0.0690
Epoch 110/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0434 - val_loss: 0.0712
Epoch 111/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0466 - val_loss: 0.0714
Epoch 112/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0411 - val_loss: 0.0736
Epoch 113/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0462 - val_loss: 0.0700
Epoch 114/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0475 - val_loss: 0.0673
Epoch 115/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0456 - val_loss: 0.0681
Epoch 116/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0453 - val_loss: 0.0713
Epoch 117/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0432 - val_loss: 0.0776
Epoch 118/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0491 - val_loss: 0.0657
Epoch 119/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0491 - val_loss: 0.0708
Epoch 120/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0464 - val_loss: 0.0674
Epoch 121/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0451 - val_loss: 0.0706
Epoch 122/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0472 - val_loss: 0.0696
Epoch 123/200
19/19 [==============================] - 0s 8ms/step - loss: 0.0465 - val_loss: 0.0710
Epoch 124/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0467 - val_loss: 0.0748
Epoch 125/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0458 - val_loss: 0.0721
Epoch 126/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0446 - val_loss: 0.0668
Epoch 127/200
19/19 [==============================] - 0s 9ms/step - loss: 0.0466 - val_loss: 0.0748
Epoch 128/200
19/19 [==============================] - 0s 10ms/step - loss: 0.0480 - val_loss: 0.0694
Epoch 128: early stopping
Out[99]:
<keras.callbacks.History at 0x7facf0404c40>
In [100]:
net.load_weights(filepath = '../net_weights.hdf5')
pred = net.predict(X_test)
In [102]:
result = pd.DataFrame({'CareDate':list(amount_norm['CareDate'][172:amount_norm['CareDate'].shape[0]].values), 'RealValue': Y_test * sd + mu,'Prediction': (pred * sd + mu).reshape(-1)})
In [103]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(50, 12))

ax.plot(result['CareDate'], result['Prediction'], linewidth = 3, marker = 'o', markersize = 10, label = 'prediction')
ax.plot(result['CareDate'], result['RealValue'], linewidth = 3, marker = 'o', markersize = 10, label = 'real value')
ax.set_xlabel('Date', fontsize = 30)
ax.set_ylabel('Amount of Cares', fontsize = 30)
ax.tick_params(axis = 'x', labelsize = 25)
ax.tick_params(axis = 'y', labelsize = 25)
ax.legend(fontsize = 30)
ax.grid(True)
In [104]:
patientD = df['patients'][df['patients']['DEATHDATE'].notna()]
patientA = df['patients'][df['patients']['DEATHDATE'].isna()]
In [105]:
patientA_sub = patientA.sample(n = patientD.shape[0], replace = False, random_state = 2)
patientA_sub['DEATHDATE'] = patientA_sub['DEATHDATE'].fillna('2023-04-10')
In [106]:
patientA_sub['ifDead'] = [0] * patientD.shape[0]
patientD['ifDead'] = [1] * patientD.shape[0]
patient_toUse = pd.concat([patientA_sub, patientD])
/tmp/ipykernel_91928/3939563891.py:2: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [108]:
patient_toUse.columns
Out[108]:
Index(['Id', 'BIRTHDATE', 'DEATHDATE', 'SSN', 'DRIVERS', 'PASSPORT', 'PREFIX',
       'FIRST', 'LAST', 'SUFFIX', 'MAIDEN', 'MARITAL', 'RACE', 'ETHNICITY',
       'GENDER', 'BIRTHPLACE', 'ADDRESS', 'CITY', 'STATE', 'COUNTY', 'ZIP',
       'LAT', 'LON', 'HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE', 'Name',
       'Address', 'ifDead'],
      dtype='object')
In [109]:
patient_col = ['Id', 'BIRTHDATE', 'DEATHDATE', 'MARITAL', 'RACE', 'ETHNICITY', 'GENDER', 'LAT', 'LON', 'HEALTHCARE_EXPENSES', 'HEALTHCARE_COVERAGE', 'ifDead']
patient_toUse = patient_toUse[patient_col].reset_index(drop = True)
patient_toUse['age'] = pd.DatetimeIndex(patient_toUse['DEATHDATE']).year - pd.DatetimeIndex(patient_toUse['BIRTHDATE']).year
In [145]:
# encounterClass = list(set(df['encounters']['ENCOUNTERCLASS']))
# encounterClass
In [110]:
encounter_count = []
medication_dispenses = []
immunization_record = []
care_record = []
procedure_record = []

for patient in patient_toUse['Id']:
    encounter = []
    for ec in list(set(df['encounters']['ENCOUNTERCLASS'])):
        ec_count = df['encounters'][df['encounters']['PATIENT'] == patient][df['encounters']['ENCOUNTERCLASS'] == ec]
        encounter.append(ec_count.shape[0])
    encounter_count.append(encounter)
    
    med_info = df['medications'][df['medications']['PATIENT'] == patient]
    if med_info.shape != 0:
        medication_dispenses.append(sum(med_info['DISPENSES'].values))
    else:
        medication_dispenses.append(0)
    
    imm_info = df['immunizations'][df['immunizations']['PATIENT'] == patient]
    immunization_record.append(imm_info.shape[0])
    
    care_info = df['careplans'][df['careplans']['PATIENT'] == patient]
    if care_info.shape[0] == 0:
        care_record.append((0, 0))
    else:
        care_info_sub = care_info[care_info['STOP'] != '']
        care_days = sum([(datetime.strptime(care_info_sub['STOP'].values[k], "%Y-%m-%d")-datetime.strptime(care_info_sub['START'].values[k], "%Y-%m-%d")).days for k in range(care_info_sub.shape[0])]) 
        care_record.append((care_info[care_info['STOP'] == ''].shape[0], care_days))
        
    procedure_info = df['procedures'][df['procedures']['PATIENT'] == patient]
    procedure_record.append(procedure_info.shape[0])
/tmp/ipykernel_91928/1428586908.py:10: UserWarning:

Boolean Series key will be reindexed to match DataFrame index.

In [111]:
patient_toUse['medicationDispenses'] = medication_dispenses
patient_toUse['immunizationRecord'] = immunization_record
patient_toUse['procedureRecord'] = procedure_record
patient_toUse = patient_toUse.join(pd.DataFrame(encounter_count, columns = list(set(df['encounters']['ENCOUNTERCLASS']))))
patient_toUse = patient_toUse.join(pd.DataFrame(care_record, columns = ['longtermCareplan(times)', 'shorttermCareplan(days)']))
In [112]:
col_toDrop = ['Id', 'BIRTHDATE', 'DEATHDATE']
patient_toUse = patient_toUse.drop(col_toDrop, axis = 1)
In [113]:
to_predict = ['ifDead']
x_cols = [x for x in list(patient_toUse) if x not in to_predict]
In [114]:
x_cat = ['MARITAL', 'RACE', 'ETHNICITY', 'GENDER']
x_num = [x for x in x_cols if x not in x_cat]
In [117]:
df_onehot = pd.get_dummies(patient_toUse[x_cat], 
                           prefix = {'MARITAL':'MARITAL', 'RACE':'RACE', 'ETHNICITY':'ETHNICITY', 'GENDER':'GENDER'},
                           drop_first = True)
In [120]:
mu = patient_toUse[x_num].mean(0)
sd = patient_toUse[x_num].std(0)

# Normalize data
df_num = (patient_toUse[x_num] - mu) / sd
Y = patient_toUse[to_predict]
X = df_num.join(df_onehot)
In [123]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size = 0.1, random_state = 0)
In [124]:
from sklearn.ensemble import RandomForestClassifier
from sklearn import metrics

import random

# random forest
rv = np.random.randint(0, 10, x.shape[0])
clf = RandomForestClassifier(n_estimators = 100, max_depth = 50)

clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)

# neural networks
layer_size = [128, 512, 64]

inp = Input(x_train.shape[1:])
out = inp
for ls in layer_size:
    out = Dense(ls, activation = "relu")(out)
    out = Dropout(0.2)(out)
out = Dense(1, activation = "sigmoid")(out)

    
net = Model(inp, out)
net.compile(loss = "binary_crossentropy", optimizer = Adam(0.001), metrics = ['accuracy'])

mcp_save = ModelCheckpoint('../weights.hdf5', save_best_only = True, monitor = 'val_accuracy', mode = 'max')
     #callbacks=[mcp_save],  validation_split=0.15,
net.fit(x_train, y_train, epochs = 250, batch_size = 16, verbose = 0, validation_split = 0.1, callbacks = [mcp_save])
test_loss, test_acc = net.evaluate(x_test, y_test)

# random guess (baseline)
y_guess = random.choices([0, 1], [0.5, 0.5], k = len(y_test))
/tmp/ipykernel_91928/3924046214.py:10: DataConversionWarning:

A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().

2/2 [==============================] - 0s 3ms/step - loss: 3.2411 - accuracy: 0.7143
In [125]:
print("RF Accuracy:", metrics.accuracy_score(y_test, y_pred))
print("NN Accuracy:", test_acc)
print("RG Accuracy:", metrics.accuracy_score(y_test, y_guess))
RF Accuracy: 0.7428571428571429
NN Accuracy: 0.7142857313156128
RG Accuracy: 0.4857142857142857
In [126]:
import shap
shap.initjs()

explainer = shap.TreeExplainer(clf)
shap_values = explainer.shap_values(x_train, approximate = False, check_additivity = False)

shap.summary_plot(shap_values[1], x_train)